Wholesale customers Data Set (“Потребители оптовика”)
Данные об оптовых продажах за год в Португалии.
sales.data <- read.csv2("Wholesale.csv", sep = ",")
head(sales.data)
## Channel Region Fresh Milk Grocery Frozen Detergents_Paper Delicassen
## 1 2 3 12669 9656 7561 214 2674 1338
## 2 2 3 7057 9810 9568 1762 3293 1776
## 3 2 3 6353 8808 7684 2405 3516 7844
## 4 1 3 13265 1196 4221 6404 507 1788
## 5 2 3 22615 5410 7198 3915 1777 5185
## 6 2 3 9413 8259 5126 666 1795 1451
Смешаем признаки, чтобы было интереснее смотреть на значимое число дискриминантных функций.
sales.data$Channel <- as.factor(sales.data$Channel)
sales.data$Region <- as.factor(sales.data$Region)
sales.data$ChannelAndRegion <- with(sales.data, interaction(Channel, Region))
levels(sales.data$Channel)
## [1] "1" "2"
levels(sales.data$Region)
## [1] "1" "2" "3"
levels(sales.data$ChannelAndRegion)
## [1] "1.1" "2.1" "1.2" "2.2" "1.3" "2.3"
Цвета для matrixplot’а.
sales.data <- sales.data[,c("ChannelAndRegion", "Fresh","Milk","Grocery","Frozen","Detergents_Paper","Delicassen")]
factor.colors <- palette(c(rgb(134,82,187, maxColorValue=255),
rgb(148,195,92, maxColorValue=255),
rgb(189,85,126, maxColorValue=255),
rgb(146,180,183, maxColorValue=255),
rgb(196,112,62, maxColorValue=255),
rgb(75,64,63, maxColorValue=255)))
Колчичество индивидов по группам.
sapply(levels(sales.data$ChannelAndRegion), function(level){
nrow(sales.data[sales.data$ChannelAndRegion == level,])
})
## 1.1 2.1 1.2 2.2 1.3 2.3
## 59 18 28 19 211 105
Matrixplot.
pairs.panels(sales.data[,c("Fresh","Milk","Grocery","Frozen","Detergents_Paper","Delicassen")],
bg=factor.colors[sales.data$ChannelAndRegion],
pch=21,
lm=TRUE,
lwd = 1,
ellipses = FALSE)
Прологорифмируем все.
sales.data.log <- sales.data
sales.data.log$Fresh <- log(sales.data.log$Fresh)
sales.data.log$Milk <- log(sales.data.log$Milk)
sales.data.log$Grocery <- log(sales.data.log$Grocery)
sales.data.log$Frozen <- log(sales.data.log$Frozen)
sales.data.log$Detergents_Paper <- log(sales.data.log$Detergents_Paper)
sales.data.log$Delicassen <- log(sales.data.log$Delicassen)
Снова посмотрим на matrixplot.
nums <- c("Fresh","Milk","Grocery","Frozen","Detergents_Paper","Delicassen")
pairs.panels(sales.data.log[,nums],
bg=factor.colors[sales.data.log$ChannelAndRegion],
pch=21,
lm=TRUE,
lwd = 1,
ellipses = FALSE)
Уберем аутлайер, который видно в строке с Grocery.
row <- which.min(sales.data.log[,"Grocery"])
sales.data.log <- sales.data.log[-row,]
Проверим нормальность.
mshapiro.test(t(as.matrix(sales.data.log[,nums])))
##
## Shapiro-Wilk normality test
##
## data: Z
## W = 0.88598, p-value < 2.2e-16
Гипотеза о номальости отвергается.
Проверим равенство ковариационных матриц.
boxM(sales.data.log[,nums], sales.data.log$ChannelAndRegion)
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: sales.data.log[, nums]
## Chi-Sq (approx.) = 319.99, df = 105, p-value < 2.2e-16
Гипотеза о равенсвте ковариационных матриц отвергается.
Wilks’ Lambda Test:
sales.manova <- manova(cbind(Fresh,Milk,Grocery,Frozen,Detergents_Paper,Delicassen) ~ ChannelAndRegion , data = sales.data.log)
summary(sales.manova, test = "Wilks")
## Df Wilks approx F num Df den Df Pr(>F)
## ChannelAndRegion 5 0.39363 14.997 30 1714 < 2.2e-16 ***
## Residuals 433
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Гипотеза о равенстве средних по группам отвергается, разница между группами есть. (Все это верно в нормальной модели, которой здесь нет.)
Pooled Covarience Matrix
cov.list <- lapply(levels(sales.data.log$ChannelAndRegion), function(group){
(nrow(sales.data.log[sales.data.log$ChannelAndRegion == group,nums]) - 1) * cov(sales.data.log[sales.data.log$ChannelAndRegion == group,nums])
})
cov.pooled <- Reduce("+",cov.list)/(nrow(sales.data.log) - length(levels(sales.data.log$ChannelAndRegion)))
Расстояние Махаланобиса между группами (Hotelling T2 test).
mach <- pairwise.mahalanobis(sales.data.log[,nums], grouping = sales.data.log$ChannelAndRegion, cov = cov.pooled)
print(mach)
## $means
## 1 2 3 4 5 6
## 1.1 9.001382 7.756570 7.961288 7.573781 6.057642 6.556345
## 2.1 7.902661 9.124783 9.656924 7.271975 8.726458 7.194342
## 1.2 8.700916 7.431286 8.190612 7.819395 5.784568 6.652988
## 2.2 8.305092 8.837793 9.457913 6.864545 8.665397 6.638330
## 1.3 8.896088 7.709915 7.922872 7.458495 5.931266 6.528948
## 2.3 8.462722 9.040616 9.491755 6.769726 8.553101 6.913929
##
## $cov
## Fresh Milk Grocery Frozen
## Fresh 2.14050968 0.1235884 -0.014162411 0.671802816
## Milk 0.12358843 0.7825135 0.454890227 0.126308889
## Grocery -0.01416241 0.4548902 0.598416920 0.001437061
## Frozen 0.67180282 0.1263089 0.001437061 1.553779056
## Detergents_Paper -0.07665753 0.4795283 0.543787961 -0.067978106
## Delicassen 0.55015096 0.3762369 0.222884279 0.481325520
## Detergents_Paper Delicassen
## Fresh -0.07665753 0.5501510
## Milk 0.47952833 0.3762369
## Grocery 0.54378796 0.2228843
## Frozen -0.06797811 0.4813255
## Detergents_Paper 1.36805416 0.1690879
## Delicassen 0.16908785 1.7046792
##
## $distance
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0.0000000 6.7703842 1.0105616 5.8524515 0.0244341 5.6786027
## [2,] 6.7703842 0.0000000 7.4268749 0.5362603 7.1439997 0.5766334
## [3,] 1.0105616 7.4268749 0.0000000 6.9662658 0.9662989 7.0933574
## [4,] 5.8524515 0.5362603 6.9662658 0.0000000 6.2212546 0.1630954
## [5,] 0.0244341 7.1439997 0.9662989 6.2212546 0.0000000 6.0067245
## [6,] 5.6786027 0.5766334 7.0933574 0.1630954 6.0067245 0.0000000
P-values (F distr.):
num.of.groups <- length(levels(sales.data.log$ChannelAndRegion))
p.vals <- matrix(0, nrow = num.of.groups, ncol = num.of.groups)
rownames(p.vals) <- levels(sales.data.log$ChannelAndRegion)
colnames(p.vals) <- levels(sales.data.log$ChannelAndRegion)
for(i in 1:num.of.groups){
for(j in i:num.of.groups){
n1 <- nrow(sales.data.log[sales.data.log$ChannelAndRegion == levels(sales.data.log$ChannelAndRegion)[i],])
n2 <- nrow(sales.data.log[sales.data.log$ChannelAndRegion == levels(sales.data.log$ChannelAndRegion)[j],])
m <- n1 + n2
p <- length(nums)
scaler <- 1/(1/n1 + 1/n2) * (m - p + 1)/(m*p)
p.vals[i,j] <- 1 - pf(scaler * mach$distance[i,j], p, m - p + 1)
p.vals[j,i] <- p.vals[i,j]
}
}
print(p.vals, digits = 3)
## 1.1 2.1 1.2 2.2 1.3 2.3
## 1.1 1.00e+00 8.45e-11 1.03e-02 5.17e-10 9.81e-01 0.000
## 2.1 8.45e-11 1.00e+00 8.91e-08 6.41e-01 0.00e+00 0.214
## 1.2 1.03e-02 8.91e-08 1.00e+00 1.09e-07 9.98e-04 0.000
## 2.2 5.17e-10 6.41e-01 1.09e-07 1.00e+00 1.11e-16 0.865
## 1.3 9.81e-01 0.00e+00 9.98e-04 1.11e-16 1.00e+00 0.000
## 2.3 0.00e+00 2.14e-01 0.00e+00 8.65e-01 0.00e+00 1.000
Гипотеза о попарном равенстве средних, для групп 2.1, 2.2, 2.3 не отвергается. То же самое можно сказать про группы 1.1 и 1.3. Остальные p-value меньше 0.05.
sales.lda <- lda(sales.data.log[,nums], sales.data.log$ChannelAndRegion)
sales.lda
## Call:
## lda(sales.data.log[, nums], sales.data.log$ChannelAndRegion)
##
## Prior probabilities of groups:
## 1.1 2.1 1.2 2.2 1.3 2.3
## 0.13439636 0.04100228 0.06378132 0.04328018 0.47835991 0.23917995
##
## Group means:
## Fresh Milk Grocery Frozen Detergents_Paper Delicassen
## 1.1 9.001382 7.756570 7.961288 7.573781 6.057642 6.556345
## 2.1 7.902661 9.124783 9.656924 7.271975 8.726458 7.194342
## 1.2 8.700916 7.431286 8.190612 7.819395 5.784568 6.652988
## 2.2 8.305092 8.837793 9.457913 6.864545 8.665397 6.638330
## 1.3 8.896088 7.709915 7.922872 7.458495 5.931266 6.528948
## 2.3 8.462722 9.040616 9.491755 6.769726 8.553101 6.913929
##
## Coefficients of linear discriminants:
## LD1 LD2 LD3 LD4
## Fresh -0.0396896934 0.2734213 -0.5263659 -0.04083656
## Milk 0.1117766413 1.0862091 0.6899832 0.53074717
## Grocery 0.4657727887 -1.4867393 -0.9360518 0.48138177
## Frozen -0.1422786729 -0.4292839 0.5045370 -0.35985035
## Detergents_Paper 0.5546018618 0.2902393 0.1930161 -0.70847849
## Delicassen 0.0003849479 -0.1363020 0.2111597 0.43925071
## LD5
## Fresh 0.46060960
## Milk 0.09703529
## Grocery 0.09739006
## Frozen 0.36298710
## Detergents_Paper 0.06914301
## Delicassen 0.05847696
##
## Proportion of trace:
## LD1 LD2 LD3 LD4 LD5
## 0.9409 0.0455 0.0088 0.0038 0.0010
Plug-in classification table
sales.lda.predict <- predict(sales.lda, sales.data.log[,nums])$class
table(sales.lda.predict, sales.data.log$ChannelAndRegion) #Качество классификации
##
## sales.lda.predict 1.1 2.1 1.2 2.2 1.3 2.3
## 1.1 0 0 0 0 0 0
## 2.1 0 1 0 0 0 0
## 1.2 1 0 2 0 2 0
## 2.2 0 0 0 0 0 0
## 1.3 53 2 25 2 190 8
## 2.3 5 15 1 17 18 97
Cross-validation table
sales.lda <- lda(sales.data.log[,nums], sales.data.log$ChannelAndRegion, CV = TRUE)
table(sales.lda$class, sales.data.log$ChannelAndRegion) #cross-validation
##
## 1.1 2.1 1.2 2.2 1.3 2.3
## 1.1 0 0 0 0 0 0
## 2.1 0 0 0 0 0 0
## 1.2 1 0 1 0 2 0
## 2.2 0 0 0 0 0 0
## 1.3 53 2 26 2 189 8
## 2.3 5 16 1 17 19 97
Out-of-sample
sales.train <- sales.data.log[seq(1,nrow(sales.data.log),2),]
sales.unknown <- sales.data.log[-seq(1,nrow(sales.data.log),2),]
sales.lda <- lda(sales.train[,nums], sales.train$ChannelAndRegion)
sales.lda
## Call:
## lda(sales.train[, nums], sales.train$ChannelAndRegion)
##
## Prior probabilities of groups:
## 1.1 2.1 1.2 2.2 1.3 2.3
## 0.13181818 0.04090909 0.05454545 0.05454545 0.45454545 0.26363636
##
## Group means:
## Fresh Milk Grocery Frozen Detergents_Paper Delicassen
## 1.1 8.930045 7.831360 7.988323 7.547163 6.291285 6.367474
## 2.1 8.195412 9.176180 9.666030 7.573735 8.949307 7.155830
## 1.2 8.606798 7.474090 8.402664 7.867956 6.040711 6.620134
## 2.2 8.337416 9.061595 9.556045 6.710669 8.811622 6.418384
## 1.3 8.946080 7.772489 7.869457 7.633361 5.855567 6.614237
## 2.3 8.504103 9.027258 9.524591 6.631687 8.548342 6.916502
##
## Coefficients of linear discriminants:
## LD1 LD2 LD3 LD4
## Fresh 0.022882343 0.2383549 -0.2874884 -0.241828840
## Milk 0.210242390 1.2421383 0.1757561 0.429071703
## Grocery 0.306202601 -1.8390170 -0.7365692 -0.136321921
## Frozen -0.262471915 -0.3458249 0.7223686 -0.001420295
## Detergents_Paper 0.617210223 0.4489793 0.5532875 -0.236530504
## Delicassen 0.002034182 -0.1351499 -0.1024414 0.660915243
## LD5
## Fresh 0.29574320
## Milk -0.86162878
## Grocery -0.45044941
## Frozen -0.08988061
## Detergents_Paper 0.66048779
## Delicassen 0.41398995
##
## Proportion of trace:
## LD1 LD2 LD3 LD4 LD5
## 0.9100 0.0631 0.0178 0.0082 0.0009
Plug-in classification table
sales.lda.predict <- predict(sales.lda, sales.unknown[,nums])$class
table(sales.lda.predict, sales.unknown$ChannelAndRegion) #Качество классификации
##
## sales.lda.predict 1.1 2.1 1.2 2.2 1.3 2.3
## 1.1 1 0 0 0 0 0
## 2.1 0 1 0 0 0 0
## 1.2 1 0 1 0 5 0
## 2.2 0 0 0 0 0 0
## 1.3 26 1 15 1 91 3
## 2.3 2 7 0 6 14 44
partimat(ChannelAndRegion ~ ., sales.data.log[,c(nums, "ChannelAndRegion")], method="lda")
sales.qda <- qda(sales.data.log[,nums], sales.data.log$ChannelAndRegion)
sales.qda
## Call:
## qda(sales.data.log[, nums], sales.data.log$ChannelAndRegion)
##
## Prior probabilities of groups:
## 1.1 2.1 1.2 2.2 1.3 2.3
## 0.13439636 0.04100228 0.06378132 0.04328018 0.47835991 0.23917995
##
## Group means:
## Fresh Milk Grocery Frozen Detergents_Paper Delicassen
## 1.1 9.001382 7.756570 7.961288 7.573781 6.057642 6.556345
## 2.1 7.902661 9.124783 9.656924 7.271975 8.726458 7.194342
## 1.2 8.700916 7.431286 8.190612 7.819395 5.784568 6.652988
## 2.2 8.305092 8.837793 9.457913 6.864545 8.665397 6.638330
## 1.3 8.896088 7.709915 7.922872 7.458495 5.931266 6.528948
## 2.3 8.462722 9.040616 9.491755 6.769726 8.553101 6.913929
Plug-in classification table
sales.qda.predict <- predict(sales.qda, sales.data.log[,nums])$class
table(sales.qda.predict, sales.data.log$ChannelAndRegion) #Качество классификации
##
## sales.qda.predict 1.1 2.1 1.2 2.2 1.3 2.3
## 1.1 2 0 0 0 3 0
## 2.1 0 2 0 0 0 3
## 1.2 1 0 4 0 1 0
## 2.2 0 0 0 5 0 0
## 1.3 50 3 24 1 183 6
## 2.3 6 13 0 13 23 96
Cross-validation table
sales.qda <- qda(sales.data.log[,nums], sales.data.log$ChannelAndRegion, CV = TRUE)
table(sales.qda$class, sales.data.log$ChannelAndRegion) #cross-validation
##
## 1.1 2.1 1.2 2.2 1.3 2.3
## 1.1 0 0 0 0 4 0
## 2.1 0 1 0 0 0 4
## 1.2 1 0 2 0 2 0
## 2.2 0 0 0 2 0 2
## 1.3 52 3 26 2 181 11
## 2.3 6 14 0 15 23 88
Out-of-sample
sales.qda <- qda(sales.train[,nums], sales.train$ChannelAndRegion)
sales.qda
## Call:
## qda(sales.train[, nums], sales.train$ChannelAndRegion)
##
## Prior probabilities of groups:
## 1.1 2.1 1.2 2.2 1.3 2.3
## 0.13181818 0.04090909 0.05454545 0.05454545 0.45454545 0.26363636
##
## Group means:
## Fresh Milk Grocery Frozen Detergents_Paper Delicassen
## 1.1 8.930045 7.831360 7.988323 7.547163 6.291285 6.367474
## 2.1 8.195412 9.176180 9.666030 7.573735 8.949307 7.155830
## 1.2 8.606798 7.474090 8.402664 7.867956 6.040711 6.620134
## 2.2 8.337416 9.061595 9.556045 6.710669 8.811622 6.418384
## 1.3 8.946080 7.772489 7.869457 7.633361 5.855567 6.614237
## 2.3 8.504103 9.027258 9.524591 6.631687 8.548342 6.916502
Plug-in classification table
sales.qda.predict <- predict(sales.qda, sales.unknown[,nums])$class
table(sales.qda.predict, sales.unknown$ChannelAndRegion) #Качество классификации
##
## sales.qda.predict 1.1 2.1 1.2 2.2 1.3 2.3
## 1.1 1 0 0 0 3 0
## 2.1 0 2 0 1 0 3
## 1.2 2 0 1 0 0 1
## 2.2 1 0 0 0 1 5
## 1.3 24 1 15 1 92 4
## 2.3 2 6 0 5 14 34
partimat(ChannelAndRegion ~ ., sales.data.log[,c(nums, "ChannelAndRegion")], method="qda")
sales.data.log$ChannelAndRegion <- mapvalues(sales.data.log$ChannelAndRegion, from = c("2.1", "2.2", "2.3"), to = c("Shops", "Shops", "Shops"))
sales.data.log$ChannelAndRegion <- mapvalues(sales.data.log$ChannelAndRegion, from = c("1.1", "1.3"), to = c("HotelsLR", "HotelsLR"))
sales.data.log$ChannelAndRegion <- mapvalues(sales.data.log$ChannelAndRegion, from = "1.2", to = "HotelsP")
sapply(levels(sales.data.log$ChannelAndRegion), function(level){
nrow(sales.data.log[sales.data.log$ChannelAndRegion == level,])
})
## HotelsLR Shops HotelsP
## 269 142 28
sales.lda <- lda(sales.data.log[,nums], sales.data.log$ChannelAndRegion)
sales.lda
## Call:
## lda(sales.data.log[, nums], sales.data.log$ChannelAndRegion)
##
## Prior probabilities of groups:
## HotelsLR Shops HotelsP
## 0.61275626 0.32346241 0.06378132
##
## Group means:
## Fresh Milk Grocery Frozen Detergents_Paper Delicassen
## HotelsLR 8.919183 7.720148 7.931298 7.483780 5.958984 6.534957
## Shops 8.370637 9.024146 9.508164 6.846079 8.590101 6.912599
## HotelsP 8.700916 7.431286 8.190612 7.819395 5.784568 6.652988
##
## Coefficients of linear discriminants:
## LD1 LD2
## Fresh -0.0367761814 -0.1703317
## Milk 0.1138712993 -1.1694579
## Grocery 0.4658743718 1.6489755
## Frozen -0.1478409895 0.3239726
## Detergents_Paper 0.5554846945 -0.3446850
## Delicassen -0.0007181123 0.1139285
##
## Proportion of trace:
## LD1 LD2
## 0.9595 0.0405
Plug-in classification table
sales.lda.predict <- predict(sales.lda, sales.data.log[,nums])$class
table(sales.lda.predict, sales.data.log$ChannelAndRegion) #Качество классификации
##
## sales.lda.predict HotelsLR Shops HotelsP
## HotelsLR 243 12 26
## Shops 23 130 1
## HotelsP 3 0 1
Cross-validation table
sales.lda <- lda(sales.data.log[,nums], sales.data.log$ChannelAndRegion, CV = TRUE)
table(sales.lda$class, sales.data.log$ChannelAndRegion) #cross-validation
##
## HotelsLR Shops HotelsP
## HotelsLR 242 12 26
## Shops 24 130 1
## HotelsP 3 0 1
Out-of-sample
sales.train <- sales.data.log[seq(1,nrow(sales.data.log),2),]
sales.unknown <- sales.data.log[-seq(1,nrow(sales.data.log),2),]
sales.lda <- lda(sales.train[,nums], sales.train$ChannelAndRegion)
sales.lda
## Call:
## lda(sales.train[, nums], sales.train$ChannelAndRegion)
##
## Prior probabilities of groups:
## HotelsLR Shops HotelsP
## 0.58636364 0.35909091 0.05454545
##
## Group means:
## Fresh Milk Grocery Frozen Detergents_Paper Delicassen
## HotelsLR 8.942476 7.785723 7.896179 7.613983 5.953519 6.558763
## Shops 8.443616 9.049440 9.545482 6.751006 8.634013 6.868104
## HotelsP 8.606798 7.474090 8.402664 7.867956 6.040711 6.620134
##
## Coefficients of linear discriminants:
## LD1 LD2
## Fresh 0.01961188 -0.19667436
## Milk 0.20985568 -1.28921833
## Grocery 0.32767711 1.90387744
## Frozen -0.26903331 0.28970854
## Detergents_Paper 0.60093136 -0.47452935
## Delicassen 0.01450623 0.09234381
##
## Proportion of trace:
## LD1 LD2
## 0.9369 0.0631
Plug-in classification table
sales.lda.predict <- predict(sales.lda, sales.unknown[,nums])$class
table(sales.lda.predict, sales.unknown$ChannelAndRegion) #Качество классификации
##
## sales.lda.predict HotelsLR Shops HotelsP
## HotelsLR 121 5 15
## Shops 13 58 0
## HotelsP 6 0 1
partimat(ChannelAndRegion ~ ., sales.data.log[,c(nums, "ChannelAndRegion")], method="lda")
sales.qda <- qda(sales.data.log[,nums], sales.data.log$ChannelAndRegion)
sales.qda
## Call:
## qda(sales.data.log[, nums], sales.data.log$ChannelAndRegion)
##
## Prior probabilities of groups:
## HotelsLR Shops HotelsP
## 0.61275626 0.32346241 0.06378132
##
## Group means:
## Fresh Milk Grocery Frozen Detergents_Paper Delicassen
## HotelsLR 8.919183 7.720148 7.931298 7.483780 5.958984 6.534957
## Shops 8.370637 9.024146 9.508164 6.846079 8.590101 6.912599
## HotelsP 8.700916 7.431286 8.190612 7.819395 5.784568 6.652988
Plug-in classification table
sales.qda.predict <- predict(sales.qda, sales.data.log[,nums])$class
table(sales.qda.predict, sales.data.log$ChannelAndRegion) #Качество классификации
##
## sales.qda.predict HotelsLR Shops HotelsP
## HotelsLR 237 10 26
## Shops 29 132 0
## HotelsP 3 0 2
Cross-validation table
sales.qda <- qda(sales.data.log[,nums], sales.data.log$ChannelAndRegion, CV = TRUE)
table(sales.qda$class, sales.data.log$ChannelAndRegion) #cross-validation
##
## HotelsLR Shops HotelsP
## HotelsLR 237 13 27
## Shops 29 129 0
## HotelsP 3 0 1
Out-of-sample
sales.qda <- qda(sales.train[,nums], sales.train$ChannelAndRegion)
sales.qda
## Call:
## qda(sales.train[, nums], sales.train$ChannelAndRegion)
##
## Prior probabilities of groups:
## HotelsLR Shops HotelsP
## 0.58636364 0.35909091 0.05454545
##
## Group means:
## Fresh Milk Grocery Frozen Detergents_Paper Delicassen
## HotelsLR 8.942476 7.785723 7.896179 7.613983 5.953519 6.558763
## Shops 8.443616 9.049440 9.545482 6.751006 8.634013 6.868104
## HotelsP 8.606798 7.474090 8.402664 7.867956 6.040711 6.620134
Plug-in classification table
sales.qda.predict <- predict(sales.qda, sales.unknown[,nums])$class
table(sales.qda.predict, sales.unknown$ChannelAndRegion) #Качество классификации
##
## sales.qda.predict HotelsLR Shops HotelsP
## HotelsLR 122 6 15
## Shops 17 56 0
## HotelsP 1 1 1
partimat(ChannelAndRegion ~ ., sales.data.log[,c(nums, "ChannelAndRegion")], method="qda")